df_csse <- (lubridate::today(tzone = "UTC") - 2) %>%
format("%m-%d-%Y") %>%
paste0(".csv") %>%
paste0("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/",
.) %>%
readr::read_csv()
df_csse
df_csse %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 3981 |
| Number of columns | 14 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| numeric | 9 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Admin2 | 712 | 0.82 | 3 | 41 | 0 | 1924 | 0 |
| Province_State | 171 | 0.96 | 3 | 44 | 0 | 576 | 0 |
| Country_Region | 0 | 1.00 | 2 | 32 | 0 | 192 | 0 |
| Combined_Key | 0 | 1.00 | 4 | 60 | 0 | 3981 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| FIPS | 717 | 0.82 | 32419.86 | 18023.68 | 66.00 | 19052.50 | 30070.00 | 47041.50 | 99999.00 | ▆▇▆▁▁ |
| Lat | 88 | 0.98 | 35.97 | 13.02 | -52.37 | 33.27 | 37.95 | 42.21 | 71.71 | ▁▁▁▇▁ |
| Long_ | 88 | 0.98 | -71.99 | 53.64 | -178.12 | -96.60 | -86.85 | -77.49 | 178.06 | ▁▇▁▁▁ |
| Confirmed | 0 | 1.00 | 28272.87 | 141401.94 | 0.00 | 974.00 | 2704.00 | 10591.00 | 3639501.00 | ▇▁▁▁▁ |
| Deaths | 0 | 1.00 | 627.35 | 3558.87 | 0.00 | 15.00 | 48.00 | 168.00 | 107388.00 | ▇▁▁▁▁ |
| Recovered | 0 | 1.00 | 15951.92 | 106204.67 | 0.00 | 0.00 | 0.00 | 0.00 | 2540293.00 | ▇▁▁▁▁ |
| Active | 2 | 1.00 | 11698.08 | 91453.73 | -1614614.00 | 773.50 | 2147.00 | 6417.50 | 3523341.00 | ▁▇▁▁▁ |
| Incident_Rate | 89 | 0.98 | 7696.69 | 3814.12 | 0.00 | 5298.11 | 8130.74 | 10183.59 | 32387.39 | ▅▇▁▁▁ |
| Case_Fatality_Ratio | 42 | 0.99 | 2.03 | 3.25 | 0.00 | 1.16 | 1.75 | 2.49 | 158.57 | ▇▁▁▁▁ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| Last_Update | 0 | 1 | 2020-08-04 02:27:56 | 2021-02-25 05:24:57 | 2021-02-25 05:24:57 | 8 |
df_csse %>%
dplyr::mutate(Country_Region = forcats::as_factor(Country_Region)) %>%
# .$Country_Region %>% levels()
dplyr::filter(Country_Region == "Japan")
df_csse_ts <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv" %>%
readr::read_csv() %>%
dplyr::rename(Providence_State = `Province/State`, Country_Region = `Country/Region`) %>%
dplyr::select(-Lat, -Long) %>%
tidyr::pivot_longer(cols = c(-Providence_State, -Country_Region),
# tidyr::pivot_longer(cols = c(-Providence_State, -Country_Region, -Lat, -Long),
names_to = "date", values_to = "cum") %>%
dplyr::mutate(date = lubridate::mdy(date),
Country_Region = forcats::as_factor(Country_Region))
df_csse_ts
df_csse_ts %>%
dplyr::filter(Country_Region == "US")
# %>%
# dplyr::mutate(n = lagdiff(cum), ma7 = ma7(n), ma28 = ma28(n))
df_asia_daily <- df_csse_ts %>%
dplyr::filter(Country_Region %in% c("India", "Indonesia", "Japan",
"Korea, South", "Mongolia", "Philippines",
"Singapore", "Taiwan*", "Thailand",
"Vietnam")) %>%
# dplyr::filter(is.na(Providence_State)) %>%
dplyr::arrange(date) %>%
dplyr::group_by(Country_Region) %>%
dplyr::mutate(n = lagdiff(cum), ma7 = ma7(n), ma28 = ma28(n)) %>%
dplyr::ungroup()
df_asia_daily
subset <- df_csse_ts %>%
dplyr::filter(Country_Region == "China" & Providence_State == "Hong Kong") %>%
dplyr::arrange(date) %>%
dplyr::group_by(Country_Region) %>%
dplyr::mutate(n = lagdiff(cum), ma7 = ma7(n), ma28 = ma28(n)) %>%
dplyr::ungroup()
title <- "【香港】陽性者数(単日)"
xlab <- ""
ylab <- "陽性者数"
sec_scale <- 50
dbreaks <- "2 month"
dlabels <- "%y-%m"
# dvline <- lubridate::as_date("2021-01-08")
ncol <- 2
subset %>%
ggplot2::ggplot(ggplot2::aes(x = date)) +
ggplot2::geom_bar(ggplot2::aes(y = n), stat = "identity",
alpha = 0.25, width = 1.0) +
ggplot2::geom_line(ggplot2::aes(y = ma7),
linetype = "solid", size = 0.25) +
ggplot2::geom_line(ggplot2::aes(y = cum / sec_scale)) +
# ggplot2::geom_vline(xintercept = dvline, size = 0.2) +
ggplot2::scale_x_date(date_breaks = dbreaks, date_labels = dlabels) +
ggplot2::theme(legend.position = 'none') +
# ggplot2::facet_wrap(~ key, ncol = ncol, scales = "free_y") +
# ggplot2::facet_wrap(~ key, ncol = ncol, scales = "fixed") +
ggplot2::scale_y_continuous(
name = "陽性者数(棒)・移動平均(細線)",
sec.axis = ggplot2::sec_axis(~ . * sec_scale,
name = "陽性者数累計(太線)")) +
ggplot2::labs(title = title, subtitle = subtitle, caption = caption,
x = xlab, y = ylab)
df_Western_daily <- df_csse_ts %>%
dplyr::filter(Country_Region %in% c("France", "Germany", "Italy", "Spain",
"Netherlands", "Norway", "Sweden",
"United Kingdom")) %>%
# tidyr::unite(col = "Country", Country_Region, Providence_State, sep = ", ") %>%
dplyr::filter(is.na(Providence_State)) %>%
dplyr::arrange(date) %>%
dplyr::group_by(Country_Region) %>%
dplyr::mutate(n = lagdiff(cum), ma7 = ma7(n), ma28 = ma28(n)) %>%
dplyr::ungroup()
df_Western_daily